# Create data frame filter for 5 locations and year 2017, select size (remove all -9999 values and turn data into case-format)
compare_ls <- select(size_abund,YEAR,SITE,SIZE,COUNT) %>%
filter(YEAR == 2017,
SITE=="AQUE"|SITE=="NAPL"|SITE=="MOHK"|SITE=="IVEE"|SITE=="CARP",
SIZE!= "-99999")
#Remove multiple counts and organize into rows
ls_comp <- uncount(compare_ls, weights = COUNT, .remove = TRUE, .id = NULL)
#Count in decimal numbers ?
#View(counts)
View (ls_comp)
# Exploratory graphs
hist_ls <- ggplot(ls_comp, aes(x = SIZE))+
geom_histogram(bins = 10) +
facet_wrap(~ SITE, scale = "free")
hist_ls
qq_ls <-ggplot(ls_comp, aes(sample = SIZE)) +
geom_qq(bins = 10) +
facet_wrap(~ SITE, scale = "free")
## Warning: Ignoring unknown parameters: bins
qq_ls
# Groups approx. normally distributed
box_ls <- ggplot(ls_comp, aes(x = SITE, y = SIZE)) +
geom_boxplot(width = .4) +
geom_jitter(width = 0.1, alpha = 0.5, aes(color = SITE))
box_ls
#Leven's Test
variances_ls <- ls_comp %>%
group_by(SITE) %>%
summarize(
mean = mean(SIZE),
sd = sd(SIZE),
variance = var(SIZE)
)
View(variances_ls)
# assume equal variance
levene_ls <- leveneTest(SIZE ~ SITE, data = ls_comp)
levene_ls
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 8.3893 1.065e-06 ***
## 1663
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Ho: No difference in variances (Variances are equal)
# Ha: Variances are NOT equal
# There issignificant differences in variances between SITES ? (or use robust SE) 16 Can we still use anova if L-test not affected ? or Welsh ?
#ANOVA One-Way
#H0: All means are equall
#HA: At least two mean are not equal
ls_anova <- aov(SIZE ~ SITE, data = ls_comp)
ls_sum <- summary(ls_anova)
ls_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## SITE 4 2355 588.6 3.424 0.0085 **
## Residuals 1663 285871 171.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#At least two means are NOT equal ?
# Which ones ?
ls_tukey <- TukeyHSD(ls_anova)
ls_tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SIZE ~ SITE, data = ls_comp)
##
## $SITE
## diff lwr upr p adj
## CARP-AQUE -1.6657352 -6.24294710 2.911477 0.8582355
## IVEE-AQUE -2.4433772 -7.05292315 2.166169 0.5968998
## MOHK-AQUE -1.8955224 -7.02720717 3.236162 0.8514711
## NAPL-AQUE 2.3366205 -3.19311600 7.866357 0.7775633
## IVEE-CARP -0.7776420 -2.76097123 1.205687 0.8216104
## MOHK-CARP -0.2297872 -3.23309697 2.773523 0.9995765
## NAPL-CARP 4.0023556 0.36042398 7.644287 0.0228728
## MOHK-IVEE 0.5478548 -2.50450730 3.600217 0.9882889
## NAPL-IVEE 4.7799976 1.09751057 8.462485 0.0037001
## NAPL-MOHK 4.2321429 -0.08607271 8.550358 0.0579286
# Table comparing ??
stats_ls <- ls_comp %>%
group_by(SITE) %>%
summarize(
mean = round(mean(SIZE),2),
sd = round(sd(SIZE),2))
figure_ls <- ggplot(stats_ls, aes(x = SITE, y = mean)) +
geom_col(colour = NA, fill = "#FF6666", width = 0.5) +
geom_errorbar(aes(ymax = mean + sd, ymin = mean - sd), width = 0.1)+
scale_y_continuous(expand = c(0,0), limits = c(0,120))+
labs(y=expression(Mean~Lobster~Size~~(m)))+
scale_x_discrete() +
xlab("\nResearch Sites")+
geom_signif(comparisons = list(c("NAPL","CARP")), annotations = "p = 0.023", y_position = 95, tip_length = 0.1, size = 0.5, textsize = 3)+
geom_signif(comparisons = list(c("NAPL","IVEE")), annotations = "p = 0.004", y_position = 110, tip_length = 0.1, size = 0.5, textsize = 3)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(face ="bold"))+
ggtitle("Lobster Size Distribution at SBC LTER Sites")
figure_ls
#numer of samples for each site
ls_comp %>% count(SITE)
## # A tibble: 5 x 2
## SITE n
## <fct> <int>
## 1 AQUE 67
## 2 CARP 705
## 3 IVEE 606
## 4 MOHK 178
## 5 NAPL 112
Figure XX. Lobsrer Size Destribution at SBC LTER SITES. Mean lobster size(mm) for locations AQUE (n=67), CARP (n=705), IVEE (n=606), MOHK (n=178), and NAPL (n=122) Santa Barbara County. Error bars indicate +/- 1 standard deviation. Brackets indicate p-values for significantly different means as determined by one-way ANOVA with Tukey’s HSD (F(3,42) = ???, p < 0.001, with \(\alpha\) = 0.05 throughout). Data source: Santa Barbara Coastal (bc.lternet.edu/cgi-bin/showDataset.cgi?docid=knb-lter-sbc.77)
At Isla Vista and Naples Reef, the two protected MPA sites (with zero fishing pressure), how do lobster sizes in 2012 and 2017 compare? At the non-MPA sites?
#create dataframe to compare lobster size in 2012 and 2017 at MPA and non-MPA sites
MPA_df <- size_abund %>%
filter(YEAR == "2012" | YEAR == "2017", SIZE != "-99999") %>%
mutate(
MPA_STATUS = case_when(
SITE == "IVEE" ~ "MPA",
SITE == "NAPL" ~ "MPA",
SITE == "AQUE" ~ "NON-MPA",
SITE == "CARP" ~ "NON-MPA",
SITE == "MOHK" ~ "NON-MPA")) %>%
select(YEAR, SITE, SIZE, COUNT, MPA_STATUS)
#arrange data so that each lobster has its own row
MPAdf <- as.data.frame(expand.dft(MPA_df, freq = "COUNT"))
#View(MPAdf)
#create data summary frame of lobster size median, max, mean, SD, and sample size
MPA_table <- as.data.frame(expand.dft(size_abund, freq = "COUNT")) %>%
group_by(YEAR) %>% #group data together by same month
summarize(
median = round(median(SIZE),2), #find median, round to 2 digits
max = max(SIZE), #identify maximum
mean = round(mean(SIZE),2), #find mean, round to 2 digits
stdev = round(sd(SIZE),2), #find SD, round to 2 digits
length(SIZE)) #find sample size
#View(MPA_table)
#Create exloratory histograms and qq plots
#histogram to compare 2012 and 2017 lobster sizes
MPAhist <- ggplot(MPAdf, aes(x = SIZE)) +
geom_histogram(aes(bins=15, fill = SITE)) +
facet_wrap(~YEAR ~MPA_STATUS, scale = "free") +
theme_classic()
## Warning: Ignoring unknown aesthetics: bins
MPAhist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#qq plot to compare 2012 and 2017 lobster sizes
MPAqq <- ggplot(MPAdf, aes(sample = SIZE)) +
geom_qq(aes(color = SITE)) +
facet_wrap(~YEAR ~MPA_STATUS, scale = "free") +
theme_classic()
MPAqq
#create data frames to perform statistical tests to answer the question above
#AQUE dataframe
AQUEdf <- MPAdf %>%
filter(SITE == "AQUE")
#View(AQUEdf)
#CARP datafram
CARPdf <- MPAdf %>%
filter(SITE == "CARP")
#View(CARPdf)
#MOHK dataframe
MOHKdf <- MPAdf %>%
filter(SITE == "MOHK")
#View(MOHKdf)
#IVEE dataframe
IVEEdf <- MPAdf %>%
filter(SITE == "IVEE")
#View(IVEEdf)
#NAPL dataframe
NAPLdf <- MPAdf %>%
filter(SITE == "NAPL")
#View(NAPLdf)
########################
mpa_sites <- MPAdf %>%
filter(MPA_STATUS == "MPA")
#View(mpa_sites)
nonmpa_sites <- MPAdf %>%
filter(MPA_STATUS == "NON-MPA")
#View(nonmpa_sites)
post_sites <- MPAdf %>%
filter(YEAR == "2017")
#View(post_sites)
#AQUE site (comparing 2012 and 2017 sizes)
#F-Test for equal variances
#AQUE_ftest <- AQUEdf %>%
#var.test(SIZE ~ YEAR, data = .)
#AQUE_ftest
#RESULT: Variances are NOT equal
#T-test
AQUE_ttest <- AQUEdf %>%
t.test(SIZE ~ YEAR, data =.)
#AQUE_ttest
#NOT significantly different (p-value = 0.1907)
#CARP site (comparing 2012 and 2017 sizes)
#F-Test for equal variances
#CARP_ftest <- CARPdf %>%
#var.test(SIZE ~ YEAR, data = .)
#CARP_ftest
#RESULT: Variances are NOT equal
#T-test
CARP_ttest <- CARPdf %>%
t.test(SIZE ~ YEAR, data =.)
#CARP_ttest
#NOT significantly different (p-value = 0.2211)
#MOHK site (comparing 2012 and 2017 sizes)
#F-Test for equal variances
#MOHK_ftest <- MOHKdf %>%
#var.test(SIZE ~ YEAR, data = .)
#MOHK_ftest
#RESULT: Variances are NOT equal
#T-test
MOHK_ttest <- MOHKdf %>%
t.test(SIZE ~ YEAR, data =.)
#MOHK_ttest
#SIGNIFICANTLY DIFFERENT (p-value = 0.0001599)
#IVEE site (comparing 2012 and 2017 sizes)
#F-Test for equal variances
#IVEE_ftest <- IVEEdf %>%
#var.test(SIZE ~ YEAR, data = .)
#IVEE_ftest
#RESULT: Variances are NOT equal
#T-test
IVEE_ttest <- IVEEdf %>%
t.test(SIZE ~ YEAR, data =.)
#IVEE_ttest
#SIGNIFICANTLY DIFFERENT (p-value = 0.0361)
#NAPL site (comparing 2012 and 2017 sizes)
#F-Test for equal variances
#NAPL_ftest <- NAPLdf %>%
#var.test(SIZE ~ YEAR, data = .)
#NAPL_ftest
#RESULT: Variances are NOT equal
#T-test
NAPL_ttest <- NAPLdf %>%
t.test(SIZE ~ YEAR, data =.)
#NAPL_ttest
#NOT significantly different (p-value = 0.5373)
####
#Comparing lobster size at ALL MPA sites (IVEE and NAPL) in 2012 and 2017
####
#F Test for equal variance
#H0: Ratio of variances is = 1
#HA: Ratio of variances is NOT = 1
mpa_ftest <- mpa_sites %>%
var.test(SIZE ~ YEAR, data = .)
mpa_ftest
##
## F test to compare two variances
##
## data: SIZE by YEAR
## F = 0.75323, num df = 31, denom df = 717, p-value = 0.3346
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.477719 1.341900
## sample estimates:
## ratio of variances
## 0.7532319
# Test determined variances are NOT equal
#Two sample two sided T-Test
#H0: The difference between the means = 0
#HA: The difference between the means is NOT = 0
mpa_ttest <- mpa_sites %>%
t.test(SIZE ~ YEAR, data =., var.equal = TRUE)
mpa_ttest
##
## Two Sample t-test
##
## data: SIZE by YEAR
## t = -1.9159, df = 748, p-value = 0.05576
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.7644724 0.1189292
## sample estimates:
## mean in group 2012 mean in group 2017
## 67.37500 72.19777
#NOT SIGNIFICANTLY DIFFERENT (p-value = 0.05576)
###
#Comparing lobster size at ALL non-MPA sites (CARP, MOHK, and AQUE) in 2012 and 2017
###
#F Test for equal variance
#H0: Ratio of variances is = 1
#HA: Ratio of variances is NOT = 1
nonmpa_ftest <- nonmpa_sites %>%
var.test(SIZE ~ YEAR, data = .)
nonmpa_ftest
##
## F test to compare two variances
##
## data: SIZE by YEAR
## F = 0.99085, num df = 198, denom df = 949, p-value = 0.953
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8037718 1.2406929
## sample estimates:
## ratio of variances
## 0.9908519
# Test determined variances are NOT equal
#Two sample two sided T-Test
#H0: The difference between the means = 0
#HA: The difference between the means is NOT = 0
nonmpa_ttest <- nonmpa_sites %>%
t.test(SIZE ~ YEAR, data =., var.equal = TRUE)
nonmpa_ttest
##
## Two Sample t-test
##
## data: SIZE by YEAR
## t = 2.6973, df = 1147, p-value = 0.007093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.7143078 4.5265173
## sample estimates:
## mean in group 2012 mean in group 2017
## 74.92462 72.30421
#SIGNIFICANTLY DIFFERENT (p-value = 0.007093)
###
#Comparing lobster size at MPA vs. Non-MPA sites in 2017
###
#Create finalized data table
sizesum <- MPAdf %>%
group_by(SITE, YEAR) %>%
summarize(
mean = round(mean(SIZE), 1),
stdev = round(mean(SIZE), 1),
length(SIZE))
sizesum
## # A tibble: 10 x 5
## # Groups: SITE [?]
## SITE YEAR mean stdev `length(SIZE)`
## <fct> <int> <dbl> <dbl> <int>
## 1 AQUE 2012 71 71 38
## 2 AQUE 2017 73.9 73.9 67
## 3 CARP 2012 74.4 74.4 78
## 4 CARP 2017 72.2 72.2 705
## 5 IVEE 2012 66.1 66.1 26
## 6 IVEE 2017 71.5 71.5 606
## 7 MOHK 2012 77.3 77.3 83
## 8 MOHK 2017 72 72 178
## 9 NAPL 2012 73 73 6
## 10 NAPL 2017 76.2 76.2 112
kable(sizesum, col.names = c("Site", "Year", "Mean (mm)", "SD (mm)", "sample size"), caption = "**Table 1**. California spiny lobster (*Panulirus interruptus*) carapace length (mm) at five Santa Barbara Channel reefs in 2012 and 2017. Source: Santa Barbara Coastal Long Term Ecological Research") %>%
kable_styling(bootstrap_options = c("striped"), full_width = F) %>%
collapse_rows(columns = 1)
| Site | Year | Mean (mm) | SD (mm) | sample size |
|---|---|---|---|---|
| AQUE | 2012 | 71.0 | 71.0 | 38 |
| 2017 | 73.9 | 73.9 | 67 | |
| CARP | 2012 | 74.4 | 74.4 | 78 |
| 2017 | 72.2 | 72.2 | 705 | |
| IVEE | 2012 | 66.1 | 66.1 | 26 |
| 2017 | 71.5 | 71.5 | 606 | |
| MOHK | 2012 | 77.3 | 77.3 | 83 |
| 2017 | 72.0 | 72.0 | 178 | |
| NAPL | 2012 | 73.0 | 73.0 | 6 |
| 2017 | 76.2 | 76.2 | 112 |